from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings
from ximalaya_scraper.spiders.ximalaya_spider import XimalayaSpider
import sqlite3
import os

def check_database():
    """检查数据库和数据"""
    db_file = 'ximalaya_data.db'
    if os.path.exists(db_file):
        conn = sqlite3.connect(db_file)
        cur = conn.cursor()
        
        # 检查专辑数量
        cur.execute("SELECT COUNT(*) FROM albums")
        album_count = cur.fetchone()[0]
        
        # 检查音频数量
        cur.execute("SELECT COUNT(*) FROM tracks")
        track_count = cur.fetchone()[0]
        
        print(f"当前数据库中有 {album_count} 个专辑, {track_count} 个音频")
        
        conn.close()
    else:
        print("数据库文件不存在，将创建新数据库")

if __name__ == "__main__":
    print("开始爬取喜马拉雅数据...")
    check_database()
    
    process = CrawlerProcess(get_project_settings())
    process.crawl(XimalayaSpider)
    process.start()
    
    print("爬取完成！")
    check_database()